static int setup_guestos(
int dom, int kernel_fd, int initrd_fd, unsigned long tot_pages,
unsigned long virt_load_addr, size_t ksize,
- dom0_builddomain_t *builddomain, int argc, char **argv, int args_start)
+ dom0_builddomain_t *builddomain, int argc, char **argv, int args_start,
+ unsigned long shared_info_frame)
{
l1_pgentry_t *vl1tab = NULL, *vl1e = NULL;
l2_pgentry_t *vl2tab = NULL, *vl2e = NULL;
unsigned long count, pt_start, i, j;
unsigned long initrd_addr = 0, initrd_len = 0;
start_info_t *start_info;
+ shared_info_t *shared_info;
int cmd_len;
memset(builddomain, 0, sizeof(*builddomain));
start_info->pt_base = virt_load_addr + ((tot_pages-1) << PAGE_SHIFT);
start_info->mod_start = initrd_addr;
start_info->mod_len = initrd_len;
+ start_info->nr_pages = tot_pages;
+ start_info->shared_info = shared_info_frame << PAGE_SHIFT;
+ start_info->dom_id = dom;
+ start_info->flags = 0;
cmd_len = 0;
for ( i = args_start; i < argc; i++ )
{
}
unmap_pfn(start_info);
+ /* shared_info page starts its life empty. */
+ shared_info = map_pfn(shared_info_frame);
+ memset(shared_info, 0, PAGE_SIZE);
+ unmap_pfn(shared_info);
+
/* Send the page update requests down to the hypervisor. */
if ( send_pgupdates(pgt_update_arr, num_pgt_updates) < 0 )
goto error_out;
* the 8-byte signature and 4-byte load address.
*/
size_t ksize;
- dom0_op_t launch_op;
+ dom0_op_t launch_op, op;
unsigned long load_addr;
long tot_pages;
int kernel_fd, initrd_fd = -1;
}
}
+ op.cmd = DOM0_GETDOMAININFO;
+ op.u.getdomaininfo.domain = domain_id;
+ if ( (do_dom0_op(&op) < 0) || (op.u.getdomaininfo.domain != domain_id) )
+ {
+ PERROR("Could not get info on domain");
+ return 1;
+ }
+ if ( (op.u.getdomaininfo.state != DOMSTATE_STOPPED) ||
+ (op.u.getdomaininfo.ctxt.pt_base != 0) )
+ {
+ ERROR("Domain is already constructed");
+ return 1;
+ }
+
if ( setup_guestos(domain_id, kernel_fd, initrd_fd, tot_pages,
load_addr, ksize, &launch_op.u.builddomain,
- argc, argv, args_start) < 0 )
+ argc, argv, args_start,
+ op.u.getdomaininfo.shared_info_frame) < 0 )
return 1;
if ( initrd_fd >= 0 )
ctxt = &launch_op.u.builddomain.ctxt;
+ ctxt->flags = 0;
+
/*
* Initial register values:
* DS,ES,FS,GS = FLAT_RING1_DS
/* No debugging. */
memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
- /* Domain time counts from zero. */
- ctxt->domain_time = 0;
+ /* No callback handlers. */
+ ctxt->event_callback_cs = FLAT_RING1_CS;
+ ctxt->event_callback_eip = 0;
+ ctxt->failsafe_callback_cs = FLAT_RING1_CS;
+ ctxt->failsafe_callback_eip = 0;
launch_op.u.builddomain.domain = domain_id;
launch_op.u.builddomain.num_vifs = atoi(argv[3]);
{
switch ( state )
{
- case 0: return "RUNNING";
- case 1: return "INTERRUPTIBLE";
- case 2: return "UNINTERRUPTIBLE";
- case 4: return "STOPPED";
- case 8: return "DYING";
- default: return "UNKNOWN";
+ case DOMSTATE_ACTIVE: return "ACTIVE";
+ case DOMSTATE_STOPPED: return "STOPPED";
+ default: return "UNKNOWN";
}
return NULL;
}
machine_restart(0);
}
-void exit_thread(void)
-{
- /* nothing to do ... */
-}
-
-void flush_thread(void)
-{
- struct task_struct *tsk = current;
-
- memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
- /*
- * Forget coprocessor state..
- */
- clear_fpu(tsk);
- tsk->flags &= ~PF_DONEFPUINIT;
-}
-
-void release_thread(struct task_struct *dead_task)
-{
-}
-
void new_thread(struct task_struct *p,
unsigned long start_pc,
unsigned long start_stack,
{
if (p->state != TASK_STOPPED)
{
- cpu_mask = mark_hyp_event(p, _HYP_EVENT_STOP);
- hyp_event_notify(cpu_mask);
+ cpu_mask = mark_guest_event(p, _EVENT_STOP);
+ guest_event_notify(cpu_mask);
}
put_task_struct(p);
}
strcpy (op.u.getdomaininfo.name, p->name);
op.u.getdomaininfo.processor = p->processor;
op.u.getdomaininfo.has_cpu = p->has_cpu;
- op.u.getdomaininfo.state = p->state;
+ op.u.getdomaininfo.state = DOMSTATE_ACTIVE;
+ if ( (p->state == TASK_STOPPED) || (p->state == TASK_DYING) )
+ op.u.getdomaininfo.state = DOMSTATE_STOPPED;
op.u.getdomaininfo.hyp_events = p->hyp_events;
op.u.getdomaininfo.mcu_advance = p->mcu_advance;
op.u.getdomaininfo.tot_pages = p->tot_pages;
op.u.getdomaininfo.cpu_time = p->cpu_time;
+ op.u.getdomaininfo.shared_info_frame =
+ __pa(p->shared_info) >> PAGE_SHIFT;
if ( p->state == TASK_STOPPED )
{
rmb(); /* Ensure that we see saved register state. */
+ op.u.getdomaininfo.ctxt.flags = 0;
memcpy(&op.u.getdomaininfo.ctxt.i386_ctxt,
&p->shared_info->execution_context,
sizeof(p->shared_info->execution_context));
+ if ( p->flags & PF_DONEFPUINIT )
+ op.u.getdomaininfo.ctxt.flags |= ECF_I387_VALID;
memcpy(&op.u.getdomaininfo.ctxt.i387_ctxt,
&p->thread.i387,
sizeof(p->thread.i387));
memcpy(op.u.getdomaininfo.ctxt.debugreg,
p->thread.debugreg,
sizeof(p->thread.debugreg));
- op.u.getdomaininfo.ctxt.domain_time =
- p->shared_info->domain_time;
+ op.u.getdomaininfo.ctxt.event_callback_cs = p->event_selector;
+ op.u.getdomaininfo.ctxt.event_callback_eip = p->event_address;
+ op.u.getdomaininfo.ctxt.failsafe_callback_cs =
+ p->failsafe_selector;
+ op.u.getdomaininfo.ctxt.failsafe_callback_eip =
+ p->failsafe_address;
}
}
read_unlock_irqrestore(&tasklist_lock, flags);
rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED;
struct task_struct *task_hash[TASK_HASH_SIZE];
-/*
- * create a new domain
- */
struct task_struct *do_createdomain(unsigned int dom_id, unsigned int cpu)
{
- int retval, i;
+ int retval;
struct task_struct *p = NULL;
unsigned long flags;
INIT_LIST_HEAD(&p->physdisk_aces);
- SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
- SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
-
p->addr_limit = USER_DS;
- /*
- * We're basically forcing default RPLs to 1, so that our "what privilege
- * level are we returning to?" logic works.
- */
- p->failsafe_selector = FLAT_RING1_CS;
- p->event_selector = FLAT_RING1_CS;
- p->thread.ss1 = FLAT_RING1_DS;
- for ( i = 0; i < 256; i++ ) p->thread.traps[i].cs = FLAT_RING1_CS;
-
sched_add_domain(p);
INIT_LIST_HEAD(&p->pg_head);
machine_restart(0);
}
- printk("Killing domain %d\n", p->domain);
+ /* Only allow the domain to be destroyed once. */
+ if ( !sched_rem_domain(p) )
+ return;
- sched_rem_domain(p);
+ printk("Killing domain %d\n", p->domain);
unlink_blkdev_info(p);
unlazy_fpu(current);
wmb(); /* All CPUs must see saved info in state TASK_STOPPED. */
set_current_state(TASK_STOPPED);
- clear_bit(_HYP_EVENT_STOP, ¤t->hyp_events);
__enter_scheduler();
}
if ( p->state != TASK_STOPPED )
{
- cpu_mask = mark_hyp_event(p, _HYP_EVENT_STOP);
- hyp_event_notify(cpu_mask);
+ cpu_mask = mark_guest_event(p, _EVENT_STOP);
+ guest_event_notify(cpu_mask);
}
put_task_struct(p);
}
-/* final_setup_guestos is used for final setup and launching of domains other
+/*
+ * final_setup_guestos is used for final setup and launching of domains other
* than domain 0. ie. the domains that are being built by the userspace dom0
* domain builder.
*/
int final_setup_guestos(struct task_struct *p, dom0_builddomain_t *builddomain)
{
- start_info_t * virt_startinfo_addr;
+ start_info_t *virt_startinfo_addr;
unsigned long phys_l2tab;
net_ring_t *shared_rings;
net_vif_t *net_vif;
if ( (p->flags & PF_CONSTRUCTED) )
return -EINVAL;
-
+
+ p->flags &= ~PF_DONEFPUINIT;
+ if ( builddomain->ctxt.flags & ECF_I387_VALID )
+ p->flags |= PF_DONEFPUINIT;
memcpy(&p->shared_info->execution_context,
&builddomain->ctxt.i386_ctxt,
sizeof(p->shared_info->execution_context));
memcpy(p->thread.debugreg,
builddomain->ctxt.debugreg,
sizeof(p->thread.debugreg));
+ p->event_selector = builddomain->ctxt.event_callback_cs;
+ p->event_address = builddomain->ctxt.event_callback_eip;
+ p->failsafe_selector = builddomain->ctxt.failsafe_callback_cs;
+ p->failsafe_address = builddomain->ctxt.failsafe_callback_eip;
/* NB. Page base must already be pinned! */
phys_l2tab = builddomain->ctxt.pt_base;
get_page_type(&frame_table[phys_l2tab>>PAGE_SHIFT]);
get_page_tot(&frame_table[phys_l2tab>>PAGE_SHIFT]);
- /* set up the shared info structure */
+ /* Set up the shared info structure. */
update_dom_time(p->shared_info);
- p->shared_info->domain_time = builddomain->ctxt.domain_time;
- /* we pass start info struct to guest os as function parameter on stack */
virt_startinfo_addr = (start_info_t *)builddomain->virt_startinfo_addr;
- /* we need to populate start_info struct within the context of the
- * new domain. thus, temporarely install its pagetables.
+ /*
+ * We need to populate start_info struct within the context of the new
+ * domain. Thus temporarely install its pagetables.
*/
__cli();
__asm__ __volatile__ (
"mov %%eax,%%cr3" : : "a" (pagetable_val(p->mm.pagetable)));
- virt_startinfo_addr->nr_pages = p->tot_pages;
- virt_startinfo_addr->shared_info = virt_to_phys(p->shared_info);
- virt_startinfo_addr->dom_id = p->domain;
- virt_startinfo_addr->flags = IS_PRIV(p) ? SIF_PRIVILEGED : 0;
-
/* Add virtual network interfaces and point to them in startinfo. */
while (builddomain->num_vifs-- > 0) {
net_vif = create_net_vif(p->domain);
return ret;
}
-/* setup_guestos is used for building dom0 solely. other domains are built in
+/*
+ * setup_guestos is used for building dom0 solely. other domains are built in
* userspace dom0 and final setup is being done by final_setup_guestos.
*/
int setup_guestos(struct task_struct *p, dom0_createdomain_t *params,
printk("DOM%d: Guest OS virtual load address is %08lx\n", dom,
virt_load_address);
+ SET_GDT_ENTRIES(p, DEFAULT_GDT_ENTRIES);
+ SET_GDT_ADDRESS(p, DEFAULT_GDT_ADDRESS);
+
+ /*
+ * We're basically forcing default RPLs to 1, so that our "what privilege
+ * level are we returning to?" logic works.
+ */
+ p->failsafe_selector = FLAT_RING1_CS;
+ p->event_selector = FLAT_RING1_CS;
+ p->thread.ss1 = FLAT_RING1_DS;
+ for ( i = 0; i < 256; i++ )
+ p->thread.traps[i].cs = FLAT_RING1_CS;
+
/*
* WARNING: The new domain must have its 'processor' field
* filled in by now !!
{
__enter_scheduler,
kill_domain,
- stop_domain
};
/* Handle outstanding events for the currently-executing domain. */
#include <xeno/timer.h>
#include <xeno/perfc.h>
-
#undef SCHEDULER_TRACE
#ifdef SCHEDULER_TRACE
#define TRC(_x) _x
}
}
-void sched_rem_domain(struct task_struct *p)
+int sched_rem_domain(struct task_struct *p)
{
- p->state = TASK_DYING;
+ int x, y = p->state;
+ do {
+ if ( (x = y) == TASK_DYING ) return 0;
+ } while ( (y = cmpxchg(&p->state, x, TASK_DYING)) != x );
+ return 1;
}
break;
}
+ case SCHEDOP_stop:
+ {
+ stop_domain();
+ break;
+ }
+
default:
ret = -ENOSYS;
}
unsigned long *frames,
unsigned int entries);
-/* Free all resources held by a thread. */
-extern void release_thread(struct task_struct *);
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
-/* Copy and release all segment info associated with a VM */
-extern void copy_segments(struct task_struct *p, struct mm_struct * mm);
-extern void release_segments(struct mm_struct * mm);
-
-unsigned long get_wchan(struct task_struct *p);
-#define KSTK_EIP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1019])
-#define KSTK_ESP(tsk) (((unsigned long *)(4096+(unsigned long)(tsk)))[1022])
-
struct microcode {
unsigned int hdrver;
unsigned int rev;
* indicated by comparing RETURN with OLD.
*/
-#ifdef CONFIG_X86_CMPXCHG
-#define __HAVE_ARCH_CMPXCHG 1
-
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
unsigned long new, int size)
{
((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
(unsigned long)(n),sizeof(*(ptr))))
-#else
-/* Compiling for a 386 proper. Is it worth implementing via cli/sti? */
-#endif
-
/*
* Force strict CPU ordering.
* And yes, this is required on UP too when we're talking
* This makes sure that old versions of dom0 tools will stop working in a
* well-defined way (rather than crashing the machine, for instance).
*/
-#define DOM0_INTERFACE_VERSION 0xAAAA0002
+#define DOM0_INTERFACE_VERSION 0xAAAA0003
/*
*/
typedef struct full_execution_context_st
{
+#define ECF_I387_VALID (1<<0)
+ unsigned long flags;
execution_context_t i386_ctxt; /* User-level CPU registers */
char i387_ctxt[256]; /* User-level FPU registers */
trap_info_t trap_ctxt[256]; /* Virtual IDT */
unsigned long ring1_ss, ring1_esp; /* Virtual TSS (only SS1/ESP1) */
unsigned long pt_base; /* CR3 (pagetable base) */
unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */
- u64 domain_time; /* Domain virtual time */
+ unsigned long event_callback_cs; /* CS:EIP of event callback */
+ unsigned long event_callback_eip;
+ unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */
+ unsigned long failsafe_callback_eip;
} full_execution_context_t;
#define MAX_CMD_LEN 256
char name[MAX_DOMAIN_NAME];
int processor;
int has_cpu;
+#define DOMSTATE_ACTIVE 0
+#define DOMSTATE_STOPPED 1
int state;
int hyp_events;
unsigned long mcu_advance;
unsigned int tot_pages;
long long cpu_time;
+ unsigned long shared_info_frame; /* MFN of shared_info struct */
full_execution_context_t ctxt;
} dom0_getdomaininfo_t;
#define EVENT_DEBUG 0x08 /* Request guest to dump debug info (gross!) */
#define EVENT_NET 0x10 /* There are packets for transmission. */
#define EVENT_PS2 0x20 /* PS/2 keyboard or mouse event(s) */
+#define EVENT_STOP 0x40 /* Prepare for stopping and possible pickling */
/* Bit offsets, as opposed to the above masks. */
#define _EVENT_BLKDEV 0
#define _EVENT_DEBUG 3
#define _EVENT_NET 4
#define _EVENT_PS2 5
+#define _EVENT_STOP 6
/*
* Virtual addresses beyond this are not modifiable by guest OSes. The
*/
#define SCHEDOP_yield 0
#define SCHEDOP_exit 1
+#define SCHEDOP_stop 2
#define _HYP_EVENT_NEED_RESCHED 0
#define _HYP_EVENT_DIE 1
-#define _HYP_EVENT_STOP 2
#define PF_DONEFPUINIT 0x1 /* Has the FPU been initialised for this task? */
#define PF_USEDFPU 0x2 /* Has this task used the FPU since last save? */
* arbitrary event or timer.
* TASK_STOPPED: Domain is sopped.
* TASK_DYING: Domain is about to cross over to the land of the dead.
- *
- * If you update these then please update the mapping to text names in
- * xi_list.
*/
#define TASK_RUNNING 0
unsigned long start_pc,
unsigned long start_stack,
unsigned long start_info);
-extern void flush_thread(void);
-extern void exit_thread(void);
/* Linux puts these here for some reason! */
extern int request_irq(unsigned int,
void scheduler_init(void);
void schedulers_start(void);
void sched_add_domain(struct task_struct *p);
-void sched_rem_domain(struct task_struct *p);
+int sched_rem_domain(struct task_struct *p);
long sched_bvtctl(unsigned long ctx_allow);
long sched_adjdom(int dom, unsigned long mcu_adv, unsigned long warp,
unsigned long warpl, unsigned long warpu);
* Point at the empty zero page to start with. We map the real shared_info
* page as soon as fixmap is up and running.
*/
-shared_info_t *HYPERVISOR_shared_info = empty_zero_page;
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
unsigned long *phys_to_machine_mapping;
}
__initcall(setup_death_event);
+
+
+/******************************************************************************
+ * Stop/pickle callback handling.
+ */
+
+static void time_to_stop(int irq, void *unused, struct pt_regs *regs)
+{
+ HYPERVISOR_stop();
+}
+
+static int __init setup_stop_event(void)
+{
+ (void)request_irq(_EVENT_STOP, time_to_stop, 0, "stop", NULL);
+ return 0;
+}
+
+__initcall(setup_stop_event);
+
return ret;
}
+static inline int HYPERVISOR_stop(void)
+{
+ int ret;
+ __asm__ __volatile__ (
+ TRAP_INSTR
+ : "=a" (ret) : "0" (__HYPERVISOR_sched_op),
+ "b" (SCHEDOP_stop) );
+
+ return ret;
+}
+
static inline int HYPERVISOR_dom0_op(dom0_op_t *dom0_op)
{
int ret;